library(survival)
library(KMsurv)
library(ggplot2)
library(ggpubr)
library(survminer)
library(plotly)
library(muhaz)
library(ggthemes)
vars <- c("id", "clinic", "status", "time", "prison", "methadone")
addicts <-
read.table(
url(
"https://dmrocke.ucdavis.edu/Class/BST222.2022.Fall/addicts.txt"
),
header = F,
col.names = vars
)
#addicts
#change variables to factors to be used in Cox PH
addicts$clinic <- factor(addicts$clinic,labels=c("Clinic1","Clinic2"))
addicts$prison <- factor(addicts$prison,labels=c("No","Yes"))
head(addicts)
## id clinic status time prison methadone
## 1 1 Clinic1 1 428 No 50
## 2 2 Clinic1 1 275 Yes 55
## 3 3 Clinic1 1 262 No 55
## 4 4 Clinic1 1 183 No 30
## 5 5 Clinic1 1 259 Yes 65
## 6 6 Clinic1 1 714 No 55
surv_object <- Surv(time = addicts$time, event = addicts$status)
addicts.cox <- coxph(surv_object ~ clinic + prison + methadone, data = addicts)
#summary(addicts.cox)
addicts.zph <- cox.zph(addicts.cox)
addicts.zph
## chisq df p
## clinic 11.159 1 0.00084
## prison 0.839 1 0.35969
## methadone 0.515 1 0.47310
## GLOBAL 12.287 3 0.00646
#For clinic
ggcoxzph(addicts.zph[1], se = FALSE, font.main = 12, ggtheme = theme_solarized(), point.col = "#0096FF", ylim = c(-7,7))
#For prison
ggcoxzph(addicts.zph[2], se = FALSE, font.main = 12, ggtheme = theme_solarized(), point.col = "#0096FF", ylim = c(-4,4))
#For methadone/
ggcoxzph(addicts.zph[3], se = FALSE, font.main = 12, ggtheme = theme_solarized(), point.col = "#0096FF", ylim = c(-0.5,0.5))
When we look at the plots of our Schoenfield Residuals, as well as our p-values, we can see that both methadone and prison variables are significantly proportional while our clinic variable violates our proportinality assumption at any reasonable \(\alpha\).
addicts.mart <- residuals(addicts.cox, type = "martingale")
addicts.cs <- addicts$status - addicts.mart
#Cumulative hazard
surv.csr <- survfit(Surv(addicts.cs, addicts$status) ~1, type = "fleming-harrington", data = addicts)
#plot(surv.csr, fun ="cumhaz")
cumHazPlot <-
ggsurvplot(
surv.csr,
fun = "cumhaz",
conf.int = TRUE,
palette = c("#581845"),
ggtheme = theme_solarized()
) + ggtitle("Cumulative Hazard for Cox-Snell Residuals")
#ggplotly(cumHazPlot[[1]])
ggplotly(cumHazPlot$plot + geom_abline())
We can see that since our Cox-Snell residuals curve fits a standard line quite well, we can say we do not observe a lack of fit.
#This time, without methadone
addicts.coxNoMeth <- coxph(surv_object ~ clinic + prison, data = addicts)
addicts.mart <- residuals(addicts.cox, type = "martingale")
lowessOBJ <- as.data.frame(lowess(addicts$methadone, addicts.mart))
ggplotly(
ggplot() + aes(addicts$methadone, addicts.mart) + geom_point(col = "#FFA000") + labs(x = "Methadone", y = "Martingale Residuals", title = "Martingale Residuals vs. Methadone Dosage") + geom_line(data = lowessOBJ, aes(x = x, y = y), col = "#388E3C") + theme_solarized()
)
#Create categorical methadone
addicts$catMeth <-
cut(addicts$methadone,
c(0, 60, 79, 200),
labels = c("<60", "60-79", "80+"))
addicts$catMeth <- as.factor(addicts$catMeth)
#New cox model
addicts.coxCatMeth <-
coxph(surv_object ~ clinic + prison + catMeth, data = addicts)
#We print AIC using drop1
# This is with methadone as quantitative
drop1(addicts.cox, test = "Chisq")
## Single term deletions
##
## Model:
## surv_object ~ clinic + prison + methadone
## Df AIC LRT Pr(>Chi)
## <none> 1352.5
## clinic 1 1376.9 26.3506 2.847e-07 ***
## prison 1 1354.3 3.7727 0.05209 .
## methadone 1 1381.3 30.7820 2.887e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#This is with methadone as categorical
drop1(addicts.coxCatMeth, test = "Chisq")
## Single term deletions
##
## Model:
## surv_object ~ clinic + prison + catMeth
## Df AIC LRT Pr(>Chi)
## <none> 1358.0
## clinic 1 1373.9 17.9570 2.259e-05 ***
## prison 1 1357.8 1.8475 0.1741
## catMeth 2 1381.3 27.3215 1.167e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We can see that the model with methadone as a quantitative has a better AIC than the one with methadone as categorical. Along with that, the quantitative methadone model has has a smaller p-value for the ChiSq test, providing greater statistically significant strentgh. We can conclude that replacing the quantitavie methadone with a 3-category factor does matter, as it hurts the strength of our model.
# With Linear predictor
addicts.predict <- predict(addicts.cox)
#Deviance and df beta
addicts.dev <- residuals(addicts.cox, type = "deviance")
addicts.dfb <- residuals(addicts.cox, type = "dfbeta")
#MArtingale vs Linear Predictor
ggplotly(
ggplot() + aes(
x = addicts.predict,
y = addicts.mart,
label = rownames(addicts)
) + geom_point() + labs(x = "Linear Predictor", y = "Martingale Residual", title = "Martingale Residuals vs Linear Predictor") + theme_solarized()
)
# Deviance vs Linear Predictor
ggplotly(
ggplot() + aes(
x = addicts.predict,
y = addicts.dev,
label = rownames(addicts)
) + geom_point() + labs(x = "Linear Predictor", y = "Deviance Residual", title = "Deviance Residuals vs Linear Predictor") + theme_solarized()
)
# DfBeta vs Linear Predictor
# Clinic
ggplotly(
ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 1]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Clinic Type", title = "dfbeta Values for Observation Number by Clinic Type") + theme_solarized()
)
# Prison
ggplotly(
ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 2]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Prison Status", title = "dfbeta Values for Observation Number by Prison Status") + theme_solarized()
)
#Methadone
ggplotly(
ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 3]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Methadone Level", title = "dfbeta Values for Observation Number by Methadone Level") + theme_solarized()
)
addicts[c(8, 26),]
## id clinic status time prison methadone catMeth
## 8 8 Clinic1 0 796 Yes 60 <60
## 26 27 Clinic1 0 566 Yes 45 <60
Both of these observations were prisoners who were censored observations in our data. Given their long survival times, they could have both felt they no longer needed to be part of the study.